%dataset_quarterly_sw.m

%reads in data and transforms to desired dataset

function [Y, X, vars, lev]=dataset_quarterly_sw_restat(esty1,estq1,esty2,estq2,nlags,shock_extract,use_yields,slope)

%Step 1: load yield data and select yields specified in use_yields
%-----------------------------------------------------------------

if use_yields == 1
    
    yield3m = xlsread('YieldData.xls','CRSPme quarterly','D3:D196');     % 3-month yield in annualized basis; 1959:2 - 2007:3; *** last 3 years need to be updated *** 
    yield5y = xlsread('YieldData.xls','CRSPme quarterly','L3:L196');     % 5-year yield in annualized basis; 1959:2 - 2007:3;
    yield10y = xlsread('YieldData.xls','GSW quarterly','K3:K196');       % 10-year yield in annualized basis; 1971:4-2009:3 from GSW; read in for 1959:2 - 2007:3;     

    yields = [yield5y (yield5y - yield3m) yield3m]; % read in long-rate, spread and short-rate
    yieldvars=['    5-year bond yield      '
               ' Spread (5-year - 3-month) '
               '     3-month bond yield    '];
         
    if slope == 0;
        ts_select=[1 2];
    elseif slope == 1;
        ts_select=[2 1];
    end
     
elseif use_yields == 2
    ffr = xlsread('YieldData.xls','CRSPme quarterly','N3:N196');
    yield5y = xlsread('YieldData.xls','CRSPme quarterly','L3:L196');     % quarter average 5-year yield in annualized basis; 1959:2 - 2007:3;
    yield10y = xlsread('YieldData.xls','GSW quarterly','K3:K196');       % quarter average 10-year yield in annualized basis; 1971:4-2009:3 from GSW; read in for 1959:2 - 2007:3;

    yields = [yield5y (yield5y - ffr)]; % read in long-rate and spread
    yieldvars=['     5-year bond yield     '
               'Spread (5-year - Fedfunds) '];
         
    if slope == 0;
        ts_select=[1];
    elseif slope == 1;
        ts_select=[2];
    end
                   
elseif use_yields == 3
    yields=xlsread('factors.xls','factors_quarterly','F4:H188');     %Diebold-Li factor extraction based on 3-60 months yields
                                                                    %level, slope, curvature; 1959:2 - 2005:2; 

    % yields=xlsread('factors.xls','factors_quarterly','B4:D188');     %Diebold-Li factor extraction based on 3-120 months yields from DRA
                                                                      %level, slope, curvature; 1972:1 - 2000:4;                                                                
    yieldvars=['           Level           '
               '           Slope           '
               '         Curvature         '];
         
    if slope == 0;
        ts_select=[1 2];
    elseif slope == 1;
        ts_select=[2 1];
    end
  
end

%Step 2: load macro data and select macro variables in VAR
%---------------------------------------------------------
bps=0; % bandpass filer
tr=200; % bpass long cycle periods
%load var_monte_data;
%macro=var_monte_data;

macro=xlsread('macrodata_SW_tfp_hours2_restat.xls',1,'B7:R198'); % nonfiltered data
%macro=xlsread('macrodata_SW_tfp_hours2.xls',1,'B19:R198'); % nonfiltered data
%macro=xlsread('macrodata_SW_tfp_hours_med.xls',1,'B19:R198'); %filtered data
%macro=xlsread('macrodata_SW_tfp_hours_med_ctrend_iv.xls',1,'B19:R198'); %common trend filtered with tiv
%macro=xlsread('macrodata_SW_tfp_hours_med_ctrend_noiv.xls',1,'B19:R198'); %common trend filtered with no tiv


%macro=xlsread('macrodata_SW_tfp_hours.xls',1,'B19:Q210');  %see xls sheet for details on data: B15 (B19) for the start of 1959q1 (1960q1)
%trend=xlsread('trend.xls',1,'D1:D176');

macrovars =   ['  Gross domestic product   '      %1) real gdp
               '         Consumption       '      %2) real c
               '          Investment       '      %3) real i
               '     Federal Funds rate    '      %4) fed funds rate
               '          TFP(BFK2006)     '      %5) TFP from Fernald(2012): non util adjusted				                                                
               '        TFPadj(BFK2006)    '      %6) TFP from Fernald(2012): util adjusted (based on BFK(2006))					                                                
               '         TFP(BFFK2019)     '      %7) TFP from Fernald(2019):  non util adjusted					                                               
               '         TFPadj(BFFK2019)  '      %8) TFP from Fernald(2019): util adjusted (based on BFFK(2013))                                               	                                                
               '         Inflation         '      %9) GDP inflation  					                                                
               '       Shiller stock price '      %10) stock price					                                                
               '       Hours               '      %11) Hours 					                                                
               '    Consumption (nd&s)     '      %12) c nondurables & services deflated by PCE	
               '    Otrok TFP              '      %13) Otrok TFP
               '    Consumer confidence    '      %14) Consumer confidence: Barsky AER(2014)&BBL(2014)'s E5Y(the series relative to frombusiness conditions expected the next 5 years) 
               'Consumer confidence:index  '      %15) Consumer confidence Index: michigan survey
               '  Shiller stock price (new)'      %16) stock price	        
               '         CPI inflation     '];    %17) CPI Inf	
					                                               
%rescaling some of the variables   

%macro(:,7)=bpass(macro(:,7),2,200);
  %
  macro(:,4)=macro(:,4)/4;%making it to quaterly rate: data is annualized friction 
  %macro(:,4)=bpass(macro(:,4),2,200);

  macro(:,9)=macro(:,9)/4;% data is annualized friction 
  %macro(:,4)=log(1+macro(:,4)/4);%making it to quaterly rate
  %macro(:,9)=log(macro(:,9)/4+1);%making it to quaterly rate
  macro(:,17)=macro(:,17)/4;
   %macroc=macro(:,12)-log(trend);
  macro(:,16)=macro(:,16)/100;
   
  if bps==1;
  macro(:,4)=bpass(macro(:,4),2,tr);
  macro(:,9)=bpass(macro(:,9),2,tr);
  macro(:,17)=bpass(macro(:,17),2,tr);
  
 macro(:,6)=bpass(macro(:,6),2,tr);   
 macro(:,7)=bpass(macro(:,7),2,tr);
 macro(:,1)=bpass(macro(:,1),2,tr);
 macro(:,2)=bpass(macro(:,2),2,tr);
 macro(:,3)=bpass(macro(:,3),2,tr);
 macro(:,11)=bpass(macro(:,11),2,tr);
 macro(:,16)=bpass(macro(:,16),2,tr);
 macro(:,12)=bpass(macro(:,12),2,tr);
 macro(:,15)=bpass(macro(:,15),2,tr);
 macro(:,14)=bpass(macro(:,14),2,tr);
  end 
   
   %macro(:,10)=macro(:,10)/100;
   %macro(:,11)=macro(:,11)/100;
    
%select macro variables in VAR


%macro_select=[8 1 2 3 11 4 9 ]; % var_data=20 full(no stock) + no Conf
%macro_select=[8 1 2 11 4 9 14 ]; % var_data=21 no investment(no stock) + Conf


%macro_select=[5 1 2 11 4 9 ]; % var_data=22 no investment(no stock) + NO %Conf: 2006TFP
%macro_select=[7 1 2 11 4 9 ]; % var_data=22 no investment(no stock) + NO Conf :2019 TFP
macro_select=[8 1 2 11 4 9 ]; % var_data=22 no investment(no stock) + NO Conf +Adjusted 2019TFP


%macro_select=[8 1 2 3 11 9 ]; % var_data=25 full(no stock) + no Conf+no interest



%macro_select=[7 1 2 3 11 4 9 16 ]; %full
%macro_select=[7 1 2 3 11 4 9 ]; %with no stock       
%macro_select=[7 1 2  11 4 9 16]; %no inv       
%macro_select=[7 1 2  11 4 9 ]; %no inv &no stock       

%macro_select=[5 1 2  11 4 9 ]; %no invest       

%macro_select=[7 1 2 3 11 4 9 16 15 ]; %full + Conf

%macro_select=[7 1 2 3 11 9 14 ]; %(tfp, y,c,h, pai, conf)
%macro_select=[7 1 2 3 9  ]; %tfp, y, c, iv,pai
%macro_select=[7 1 2 3 11 9 ]; %tfp, y, c, iv,h,pai
%macro_select=[7 1 2 3 11 4 9 10 ]; %tfp, y, c, iv,h,pai, sp
%macro_select=[7 1 2 3 11 4 9 ]; %with no stock       

%macro_select=[7 1 2 11 4 9 16 ]; %full(no stock) + Conf no inv


%macro_select=[6 1 12 11 4 9 ]; %no consumer confidence       
%macro_select=[8 1 12 11 ]; %no consumer confidence       

%macro_select=[1 2 3 5 6 7 8]; %no consumer confidence       


%Step 3: setup VAR and compute lags
%----------------------------------

%select sample period
%daty1=1960;       % First Year of Data Set
%datq1=1;       % First Quarter of Data Set
daty1=1957;       % First Year of Data Set
datq1=2;       % First Quarter of Data Set

n1 = (esty1-daty1)*4 + ( estq1 - datq1 + 1);          
n2 = (esty2-daty1)*4 + ( estq2 - datq1 + 1); 
       
if shock_extract == 1 
    %-----------------------------------------------------
    % data structure: term structure var + macro var
    %----------------------------------------------------- 
    if use_yields == 0;
        data = macro(n1:n2,macro_select);
        vars = macrovars(macro_select,:);
    else
        data = [yields(n1:n2,ts_select) macro(n1:n2,macro_select)];
        vars = [yieldvars(ts_select,:); macrovars(macro_select,:)];
    end
    
elseif (shock_extract == 2 | shock_extract == 3) 
    %-----------------------------------------------------
    % data structure: macro var(TFP ordered first) 
    %                      + term structure var
    %-----------------------------------------------------
    if use_yields == 0;
        data = macro(n1:n2,macro_select);
        vars = macrovars(macro_select,:);
    else
        data = [macro(n1:n2,macro_select) yields(n1:n2,ts_select)];
        vars = [macrovars(macro_select,:); yieldvars(ts_select,:)];    
%     data=[macro(n1:n2,macro_select(1:3)) ffr(n1:n2) yields(n1:n2,ts_select)]; % for end-of-quarter i-rates
    end
    
else
    display('must select a shock extraction method')
    return
end

%checking for out-of-sample values
if sum(sum(data>99999));
    disp('dataseries out of range')
    return
end

%set prior for first lag of each variable (if estimation with Minnesota
%prior
[rowy,coly]=size(data);
lev=ones(coly,1);            

%compute nlags lags
[T,nvars]=size(data);
for p=1:nlags;
    X(:,1+(p-1)*nvars:p*nvars)=data((nlags+1-p):(T-p),:);
    %gives (T-nlags) x nvars*nlags matrix of lags for all variables
    %first lag of all variables first, then second lag of all variables
    %and so on...                                               
end;

%rescaling variables since we loose nlags observations through the lagging 
Y=data((nlags+1):T,:);